#!/usr/local/bin/bash # mirrorpg-allsites Version 2.7 2017-01-27 17:00 Author: Sergej Qkowlew # This file is Public Doimain for i in config functions generate allsites 1site abandoned childs do [ ! -s ./mirrorpg-$i ] && wget http://mirrorpg.arda.ru/mirrorpg/mirrorpg-$i [ ! -s ./mirrorpg-$i ] && echo "No file mirrorpg-$i here. Download it from http://mirrorpg.arda.ru/mirrorpg/$i" [ ! -s ./mirrorpg-$i ] && exit done . ./mirrorpg-config . ./mirrorpg-functions # =================================== ====================================== # BEGIN PER-SITE CONFIGURATIONS НАЧАЛО КОНФИГУРАЦИЙ КОНКРЕТНЫХ САЙТОВ # Mirorring them in static format Создание их копий в статическом виде # from original domains с их оргинальных доменов # - airsoftrpg.ru -------------------------------------------------------- SqBuildLines airsoftrpg.ru SqBackStatic $NowDomain $NowDomain "-E" SqRetPath=`pwd` cd $NowDomain SqSubstDomainByRootTree NowDom2=`echo "www.$NowDomain" | perl -pe "~s/\./\\\\\\./gi"` SqSubstDomainByRootTree "~s/http:\/\/$NowDom2//gi" SqOurPicturesTree SqAddFiles SqCollectJSCSS "index.php/kunena/index.html" SqAddFiles SqCollectJSCSS "./index.html" SqAddFiles CSSTree SqAddFiles SqJSTree SqAddFiles SqAllPicturesTree SqAddFiles SqShowFile .404file.php /index.html 404 SqShowFile .403file.php /index.html 403 ' // /index.php?start=5.html misinterpreted as /index.php/?start=5.html //if (preg_match("/\/.+?\/\?.*/i", $_SERVER{"REQUEST_URI"})) { // $CheckFile = $_SERVER{"DOCUMENT_ROOT"} . preg_replace("/(\/.+?)\/(\?)/","\$1\$2",$_SERVER{"REQUEST_URI"}); // } else { } // /path/ must be read as /path.1.html or /path.html if (preg_match("/\/.+\/(\#.*|)\$/i", $_SERVER{"REQUEST_URI"})) { $CheckFile = $_SERVER{"DOCUMENT_ROOT"} . preg_replace("/(\/.+)\/(\#.*|)/","\$1",$_SERVER{"REQUEST_URI"}); } else { } // /path/?start=40(#*) and /path?start=40(#*) must be read as file /path?start=40 or /path?start=40.html if (preg_match("/\/.+\/\?(limit|)start=\d*(\#.*|)\$/i", $_SERVER{"REQUEST_URI"})) { $CheckFile1 = $_SERVER{"DOCUMENT_ROOT"} . preg_replace("/(\/.+)\/(\?start=|\?limitstart=)(\d*)(\#.*|)/","\$1?start=\$3.html",$_SERVER{"REQUEST_URI"}); $CheckFile2 = $_SERVER{"DOCUMENT_ROOT"} . preg_replace("/(\/.+)\/(\?start=\d*|\?limitstart=)(\d*)(\#.*|)/","\$1?limitstart=\$3.html",$_SERVER{"REQUEST_URI"}); $CheckFile3 = $_SERVER{"DOCUMENT_ROOT"} . preg_replace("/(\/.+)\/(\?start=\d*|\?limitstart=)(\d*)(\#.*|)/","\$1/unread.html",$_SERVER{"REQUEST_URI"}); if (file_exists($CheckFile1)) { $CheckFile=$CheckFile1; } else { if (file_exists($CheckFile2)) { $CheckFile=$CheckFile2; } else { if (file_exists($CheckFile3)) { $CheckFile=$CheckFile3; } else { $CheckFile = $_SERVER{"DOCUMENT_ROOT"} . preg_replace("/(\/.+)\/.*/","\$1",$_SERVER{"REQUEST_URI"}); } } } } else { } if (file_exists ($CheckFile . "/unread.html")) { $CheckFile = $CheckFile . "/unread.html"; } else { } if (file_exists ($CheckFile . ".1.html")) { $CheckFile = $CheckFile . ".1.html"; } else { } ' cd $SqRetPath SqReplace SqAddCsv "Ролевые страйкбольные игры" 5 "http://qkowlew.livejournal.com/164734.html?thread=2190974#t2190974" SqUpdateCsv # - assembly-ufa.ru -------------------------------------------------------- SqBuildLines assembly-ufa.ru SqAddConf SqBackStatic SqRobots cd $NowDomain SqSubstDomainByRootTree SqAddQueryReportTree html echo 'AddHandler application/x-httpd-php .html'>.htaccess cd .. SqReplace SqAddCsv "Ассамблея - Студия исторического танца" 4 "http://qkowlew.livejournal.com/164734.html?thread=2186622#t2186622" SqUpdateCsv # - 9satrapy.diary.ru -------------------------------------------------------- SqBuildLines 9satrapy.diary.ru SqAddConf SqBackStatic SqRobots cd $NowDomain rm -rf counter SqRecodeTree windows-1251 utf-8 SqSubstDomainByRootTree NowDom2=`echo "www.$NowDomain" | perl -pe "~s/\./\\\\\\./gi"` SqSubstDomainByRootTree "~s/http:\/\/$NowDom2//gi" SqAllPicturesTree SqAddFiles SqAddQueryReportTree htm SqAddQueryReportTree html echo 'AddHandler application/x-httpd-php .html .htm'>.htaccess wget http://static.diary.ru/userdir/3/2/6/2/3262917/rss.xml cd .. SqReplace SqAddCsv # - farstars.ru --------------------------------------------------------------- SqBuildLines farstars.ru SqMakeDirDom SqRetPath=`pwd` cd $NowDomain # wiki part # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Collects urls from paginated namespaces # for i in 0 6 do wget -O namespace$i.htm "http://$NowDomain/index.php?title=Special%3AAllPages&from=&to=&namespace=$i" cat namespace$i.htm | perl -e ' $text=join("",<>); $text=~s/.*?\/http:\/\/farstars.ru\1\n/gmsi; $text=~s/\&\;/\&/gsmi; print $text; ' | grep AllPages| grep from\= | sort -u >namespace$i.tm1 rm namespace$i.txt for j in `cat namespace$i.tm1` do wget -O namespace$i.tmp "$j" SqExtractWikiUrls namespace$i.tmp namespace$i.txt done done # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Adds urls from non-paginated namespaces for i in 1 2 3 4 5 7 8 9 10 11 12 13 14 15 102 103 104 105 do wget -O namespace$i.htm "http://$NowDomain/index.php?title=Special%3AAllPages&from=&to=&namespace=$i" SqExtractWikiUrls namespace$i.htm namespace$i.txt done # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Download pages in separate .html files Скачиваем страницы как файлы html for i in `cat namespace*.txt` do wget --restrict-file-names=nocontrol --no-parent --page-requisites -nH -x -E "http://$NowDomain/$i" done rm namespace*.??? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Duplicate Page to Page/ if needed Дублируем Page в Page/ если надо. cd wiki for i in * do [ ! -d $i ] && continue [ -s $i.html ] && [ ! -s $i/index.html ] && cp -l $i.html $i/index.html done cd .. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # .htaccess makes index.php files shower делаем index.php показывалкой всего # if file is not found itself. что не открылось само. echo 'ErrorDocument 404 /index.php ErrorDocument 403 /index.php '>.htaccess # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Build index.php files shower Строим index.php показывалку echo -n "created by mirrorpg version $SqVersion
" > index.php echo "`date`';" >>index.php echo '$CheckFile = $_SERVER{"DOCUMENT_ROOT"} . $_SERVER{"REQUEST_URI"}; if (($_SERVER{"REQUEST_URI"} == "/") || ($_SERVER{"REQUEST_URI"} == "/index.php")) { $CheckFile = $_SERVER{"DOCUMENT_ROOT"} . "/wiki/Заглавная_страница.html"; } if (file_exists ($CheckFile . ".html")) { $CheckFile = $CheckFile . ".html"; } else { } if (file_exists ($CheckFile . ".js")) { $CheckFile = $CheckFile . ".js"; } else { } if (file_exists ($CheckFile . ".css")) { $CheckFile = $CheckFile . ".css"; } else { } if (file_exists ($CheckFile)) { $DefType="text/html"; if (preg_match("/(\/skins\/.*css|gen\=css|\.css)/", $CheckFile)) { $DefType="text/css"; } if (preg_match("/(\/skins\/.*js|gen\=js|\.js)/", $CheckFile)) { $DefType="text/javascript"; } if ($sapi_name == "cgi" || $sapi_name == "cgi-fcgi") { header("Status: 200 OK"); } else { header($_SERVER["SERVER_PROTOCOL"] . " 200 OK"); } header("Content-Type: " . $DefType); echo file_get_contents($CheckFile); } else { $sapi_name = php_sapi_name(); if ($sapi_name == "cgi" || $sapi_name == "cgi-fcgi") { header("Status: 404 Not Found"); } else { header($_SERVER["SERVER_PROTOCOL"] . " 404 Not Found"); } echo "
SCRIPT_NAME : " . $_SERVER{"SCRIPT_NAME"}; echo "
REQUEST_URI : " . $_SERVER{"REQUEST_URI"}; echo "
QUERY_STRING : " . $_SERVER{"QUERY_STRING"}; echo "
CheckFile : " . $CheckFile; echo "
No Page;
$Comment
"; } ?>' >> index.php # live part wget --restrict-file-names=nocontrol -r -l 0 -x -nH -np http://$NowDomain/live/ SqSubstDomainByRootTree cd $SqRetPath SqRobots SqReplace SqAddCsv # - 10vek.ru -------------------------------------------------------- SqBuildLines 10vek.ru SqAddConf SqBackStatic SqRobots rm /tmp/addfiles.sh cd $NowDomain for i in *.html *.php do [ -s $i ] || continue cat $i | perl -e ' # deletes http://domain/ part $fulltext=join("",<>); $fulltext=~s/\r\n/\n/gsi; $fulltext=~s/http:\/\/www\.10vek\.ru//gsi; #$fulltext=~s/(href=|src=)(\"|)http:\/\/(www\.|)\.10vek\.ru\//\1\2\//gsi; print $fulltext; ' > ${i}.bak mv ${i}.bak $i cat $i | grep src= | perl -e ' # extract urls from html src $fulltext=join("",<>); $fulltext=~s/\r\n/\n/gsi; $fulltext=~s/.*?(href|src)=\"(.+?)(\".*?| .*?|)>.*/wget -x -nH http:\/\/www.10vek.ru\2/gi; print $fulltext; ' | sort -u | grep -v \# >> /tmp/addfiles.sh cat $i | grep href= | perl -e ' # extract urls from html hrefs $fulltext=join("",<>); $fulltext=~s/\r\n/\n/gsi; $fulltext=~s/.*?(href|src)=\"(.+?)(\".*?| .*?|)>.*/wget -x -nH http:\/\/www.10vek.ru\2/gi; print $fulltext; ' | sort -u | grep -v \# >> /tmp/addfiles.sh cat $i | grep mp3path= | perl -e ' # extract urls from mp3 swf player param mp3path $fulltext=join("",<>); $fulltext=~s/\r\n/\n/gsi; $fulltext=~s/.*?(mp3path)=(.+?)\".*/wget -x -nH http:\/\/www.10vek.ru\2/gi; print $fulltext; ' | sort -u | grep -v \# >> /tmp/addfiles.sh done SqAddFiles for i in *.html *.php do [ -s $i ] || continue cat $i | perl -e ' # deletes http://domain/ part again $fulltext=join("",<>); $fulltext=~s/\r\n/\n/gsi; $fulltext=~s/http:\/\/www\.10vek\.ru//gsi; #$fulltext=~s/(href=|src=)(\"|)http:\/\/(www\.|)\.10vek\.ru\//\1\2\//gsi; print $fulltext; ' > ${i}.bak mv ${i}.bak $i done CSSTree www.10vek.ru SqAddFiles cd .. SqReplace SqAddCsv "Исторический фестиваль Былинный Берег" 5 "http://qkowlew.livejournal.com/164734.html?thread=2181246#t2181246" # END PER-SITE CONFIGURATIONS КОНЕЦ КОНФИГУРАЦИЙ КОНКРЕТНЫХ САЙТОВ # =================================== ====================================== [ ! -x $SqAddHost ] && echo " Script for adding host to webserver Не найден скрипт добавления хоста config not found. $SqAddHost Only backup sites happened сделана только копия сайтов"